not part of the final delivery file.
We include this only if the professor would like to take an eye in the first tries of exploration and visualization that we did as a preliminary part of our project.
import lxml.etree as etree
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
xtree = etree.parse('./Data/activities/264774936.tcx')
xroot = xtree.getroot()
#ns is the default namespace of the tree
ns = {'d': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2'}
#Since Activity is always one element, we put the [0] to take the element out of the list
Activity = xroot.findall(".//d:Activity", ns)[0]
Creating the dataframe from the raw data:
df_cols = ["time", "latitude", "longitude", "altitude", "distance", "heartRate", "lap"]
rows = []
lapnum = 0;
for lap in Activity.findall("./d:Lap", ns):
for elem in lap.findall("./d:Track/d:Trackpoint", ns):
#don't know if float and int conversion are necessary
s_time = etree.tostring(elem.find("./d:Time", ns), method="text").strip().decode("utf-8")
s_latitude = float(etree.tostring(elem.find("./d:Position/d:LatitudeDegrees", ns), method="text").strip().decode("utf-8"))
s_longitude = float(etree.tostring(elem.find("./d:Position/d:LongitudeDegrees", ns), method="text").strip().decode("utf-8"))
s_altitude = float(etree.tostring(elem.find("./d:AltitudeMeters", ns), method="text").strip().decode("utf-8"))
s_distance = float(etree.tostring(elem.find("./d:DistanceMeters", ns), method="text").strip().decode("utf-8"))
s_heartRate = int(etree.tostring(elem.find("./d:HeartRateBpm/d:Value", ns), method="text").strip().decode("utf-8"))
rows.append({"time": s_time, "latitude": s_latitude,
"longitude": s_longitude, "altitude": s_altitude,
"distance": s_distance, "heartRate": s_heartRate,
"lap": lapnum})
lapnum = lapnum +1
out_df = pd.DataFrame(rows, columns = df_cols)
out_df['time'] = pd.to_datetime(out_df.time)
out_df.set_index('time')
out_df.describe()
plot = out_df.plot(title='heartrate by distance', x ='distance', y='heartRate', kind = 'line', figsize=(15,5))
#exploring if there is some correlation between the change of a lap and a change in heartrate
df_startlap = out_df.groupby(by='lap').first()
#for index, row in df_startlap.iterrows():
# plot.axvline(x=row['time'], color='r', linestyle='--')
#df.head()
df_startlap.plot(title='first heartrate in lap', y='heartRate', kind = 'line', figsize=(15,5))
seems like there's no corellation between the start of a lap and the heartrate, probably lap is just something that gets pushed by the user.
We now try to enrich the dataset with the instantaneous pace calculated with distance and time and understand if it's a reliable way to estimate the actual pace
#initial pace of the first element is arbitrarily set to 0
out_df.loc[0, 'Instant_pace'] = 0
#hypothesizing that all records are at 1 second distance from each other
for x in range(1, len(out_df)):
out_df.loc[x, 'Instant_pace'] = (out_df.loc[x, 'distance'] - out_df.loc[x-1, 'distance']) * 3.6 #conversion from m/s to km/h
out_df
it's almost reliable, but there are many huge drop followed by a huge increase, probably it's better to understand if it's a normal behaviour or if there is the need to "normalize" the data. We try to do this by increasing the windows size. Increasing the windows it's only a way to do this, probably we could also try to normalize by means some kind of averaging.
#try with a window size of 3
out_df_norm = out_df
out_df_norm.loc[0, 'Instant_pace'] = 0
out_df_norm.loc[1, 'Instant_pace'] = 0
out_df_norm.loc[2, 'Instant_pace'] = 0
#hypothesizing that all records are at 1 second distance from each other
for i in range(3, len(out_df)):
out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[i-3, 'distance']) / 3) * 3.6 #conversion to km/h
out_df_norm.plot(title='instant_pace by time in km/h', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
#try with a window size of w
out_df_norm = out_df
w = 10
out_df_norm.loc[0, 'Instant_pace'] = 0
#hypothesizing that all records are at 1 second distance from each other
for i in range(1, w):
out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[0, 'distance']) / i) * 3.6 #conversion to km/h
for i in range(w, len(out_df)):
out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[i-w, 'distance']) / w) * 3.6 #conversion to km/h
out_df_norm.plot(title='instant_pace by time in km/h', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
also with a huge normalization factor there are drops and rise, we could check if they are due to other factors!
#comparing pace with altitude
out_df.plot(title='instant_pace by time', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
out_df.plot(title='altitude by time', x ='time', y='altitude', kind = 'line', figsize=(15,5))
altitude remains almost the same during the entire run, this cannot be a factor
#to reach something pseudo stable we would need to normalize a lot
#try with a window size of w
out_df_norm = out_df
w = 60
#for x in range(0, w):
out_df_norm.loc[0, 'Instant_pace'] = 0
#hypothesizing that all records are at 1 second distance from each other
for i in range(1, w):
out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[0, 'distance']) / i) * 3.6 #conversion to km/h
for i in range(w, len(out_df)):
out_df_norm.loc[i, 'Instant_pace'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[i-w, 'distance']) / w) * 3.6 #conversion to km/h
out_df_norm.plot(title='instant_pace by time', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
xtree = etree.parse('./Data/activities/1066731959.tcx')
xroot = xtree.getroot()
#ns is the default namespace of the tree
ns = {'d': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2'}
#Since Activity is always one element, we put the [0] to take the element out of the list
Activity = xroot.findall(".//d:Activity", ns)[0]
df_cols = ["time", "latitude", "longitude", "altitude", "distance", "heartRate", "lap"]
rows = []
lapnum = 0;
for lap in Activity.findall("./d:Lap", ns):
for elem in lap.findall("./d:Track/d:Trackpoint", ns):
#don't know if float and int conversion are necessary
s_time = etree.tostring(elem.find("./d:Time", ns), method="text").strip().decode("utf-8")
s_latitude = float(etree.tostring(elem.find("./d:Position/d:LatitudeDegrees", ns), method="text").strip().decode("utf-8"))
s_longitude = float(etree.tostring(elem.find("./d:Position/d:LongitudeDegrees", ns), method="text").strip().decode("utf-8"))
s_altitude = float(etree.tostring(elem.find("./d:AltitudeMeters", ns), method="text").strip().decode("utf-8"))
s_distance = float(etree.tostring(elem.find("./d:DistanceMeters", ns), method="text").strip().decode("utf-8"))
s_heartRate = int(etree.tostring(elem.find("./d:HeartRateBpm/d:Value", ns), method="text").strip().decode("utf-8"))
rows.append({"time": s_time, "latitude": s_latitude,
"longitude": s_longitude, "altitude": s_altitude,
"distance": s_distance, "heartRate": s_heartRate,
"lap": lapnum})
lapnum = lapnum +1
out_df2 = pd.DataFrame(rows, columns = df_cols)
out_df2['time'] = pd.to_datetime(out_df2.time)
out_df2.set_index('time')
#trying to add instant pace to the dataset and understand if it's a reliable way to estimate the actual pace
out_df2.loc[0, 'Instant_pace'] = 0
#hypothesizing that all records are at 1 second distance from each other
for i in range(1, len(out_df2)):
out_df2.loc[i, 'Instant_pace'] = (out_df2.loc[i, 'distance'] - out_df2.loc[i-1, 'distance']) * 3.6 #conversion to km/h
out_df2.plot(title='speed by time in km/h', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
#WE COULD REUSE PRECEDENT DATASET AND ADD ONLY A COLUMN "NORMALIZED"
#try with a window size of w
out_df2_norm = out_df2
w = 60
out_df2_norm.loc[0, 'Instant_pace'] = 0
#hypothesizing that all records are at 1 second distance from each other
for i in range(1, w):
out_df2_norm.loc[i, 'Instant_pace'] = ((out_df2_norm.loc[i, 'distance'] - out_df2_norm.loc[0, 'distance']) / i) * 3.6 #conversion to km/h
for i in range(w, len(out_df2)):
out_df2_norm.loc[i, 'Instant_pace'] = ((out_df2_norm.loc[i, 'distance'] - out_df2_norm.loc[i-w, 'distance']) / w) * 3.6 #conversion to km/h
out_df2_norm.plot(title='instant_pace by time in km/h', x ='time', y='Instant_pace', kind = 'line', figsize=(15,5))
We now try to build a dataset by discretizing in blocks of about a fixed distance (100 m) the original dataset (out_df). We keep the distance and time values as they were in the out_df, we calculate the heartRate as the average of all the values inside the block. Finally we calculate the speed as:
(time_at_end_of_block - time_at_start_of_block)/ (distance_at_end_of_block - distance_at_start_of_block)
and use some conversion values (from second to minute and from meters to km).
Notice that the speed is now calculated as "minutes/KM" as requested by the professor, since the exploratory phase is almost ended.
As a drawback of this method to normalize the speed there is the fact that we "lose" a lot of data point that gets summarized, resulting in less accuracy. Furthermore, not all the blocks are composed by the same number of data points from the original dataset.
#hypothesizing that all records are at 1 second distance from each other
speed_df = pd.DataFrame(columns = ['speed', 'distance', 'time', 'heartRate'])
i = 0
x = 0
heartsum = [0 , 1]
heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance, time, heartrate from original dataset
while (i*100 <= float(out_df.tail(1)['distance'])):
if (out_df.loc[x, 'distance'] >= i*100):
speed_df.loc[i, 'distance'] = out_df.loc[x, 'distance']
speed_df.loc[i, 'time'] = x
speed_df.loc[i, 'heartRate'] = heartsum[0] / heartsum[1]
heartsum = [0, 0]
i = i+1
heartsum[0] = heartsum[0] + out_df.loc[x, 'heartRate']
heartsum[1] = heartsum[1] + 1
x = x+1
#calculate speed
for x in range(1, len(speed_df)):
speed_df.loc[x, 'speed'] = ((speed_df.loc[x, 'time'] - speed_df.loc[x-1, 'time'])/60 ) / ((speed_df.loc[x, 'distance'] - speed_df.loc[x-1, 'distance'])/1000)
speed_df.plot(title='pace by minute in minutes/km', x = 'distance', y='speed', kind = 'line', figsize=(15,5))
We now try to resort to the sliding windows approach with the pace calculated as min/km. To calculate this value, similarly to what we did before, we set a window of 60 seconds, so to enrich each data point of the original dataset with the average speed in the 60 seconds before the considered point.
In the first 60 seconds, the size of the window is set to the number of second passed from the beginning, and the distance is calculated from the first point.
If the distance between two point is zero, for example if the runner stopped for a while, the pace is set to NaN, because otherwise it would go to infinite. The same is done with values of speed really low (<10 min/km) in order to not introduce noise in the visualizations.
out_df_norm = out_df_norm.sort_values("time").reset_index(drop= True)
out_df_norm.loc[0, 'pace min/km'] = np.nan
w = 60
#hypothesizing that all records are at 1 second distance from each other
for x in range(1, w):
if (((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[0, 'distance']) >0) and ((x/60)/((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[0, 'distance'])/1000) <10)):
out_df_norm.loc[x, 'pace min/km'] = (x/60)/((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[0, 'distance'])/1000)
else:
out_df_norm.loc[x, 'pace min/km'] = np.nan
for x in range(w, len(out_df)):
if (out_df_norm.loc[x, 'distance'] - out_df_norm.loc[x-w, 'distance']):
out_df_norm.loc[x, 'pace min/km'] = (w/60)/((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[x-w, 'distance'])/1000)
else:
out_df_norm.loc[x, 'pace min/km'] = np.nan
#out_df_norm.plot(title='pace by minute in km/h', x = 'distance', y='Instant_pace', kind = 'line', figsize=(15,5))
out_df_norm.plot(title='pace by minute in minutes/km', x = 'distance', y='pace min/km', kind = 'line', figsize=(15,5))
#try della visualizzazione
out_df.plot(title='instant_pace by heartbeat in km/h', x ='heartRate', y='Instant_pace', kind = 'scatter', figsize=(15,5))
#si potrebbe pensare di visualizzare comunque il time con il colore, o fare delle cose simili a degli snake plot
#import
import plotly
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
trace_one = go.Scatter(
x=out_df.time,
y=out_df['heartRate'],
name= "heartRate vs time",
line = dict(color='#17BECF'),
opacity = 0.8)
data = [trace_one]
layout = dict(
title = 'heartRate vs time')
fig = dict(data=data, layout=layout)
iplot(fig, filename = 'heartRate vs time')
trace_one = go.Scatter(
x=out_df.distance,
y=out_df['Instant_pace'],
name= "Instant_pace vs distance",
line = dict(color='#17BECF'),
opacity = 0.8)
data = [trace_one]
layout = dict(
title = 'Instant_pace vs distance')
fig = dict(data=data, layout=layout)
iplot(fig, filename = 'Instant_pace vs distance')
trace = go.Scatter(
y = out_df_norm['heartRate'],
x = out_df_norm['pace min/km'],
mode = 'markers')
data = [trace]
iplot(data)
trace1 = go.Bar(
x = out_df_norm['heartRate'],
y = out_df_norm['pace min/km'],
name= 'Rounds Played')
data = [trace1]
layout = go.Layout(barmode='group')
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='grouped-bar')
trace1 = go.Bar(
x = out_df['heartRate'],
y = out_df['Instant_pace'],
name= 'Rounds Played')
data = [trace1]
layout = go.Layout(barmode='group')
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='grouped-bar')
y = out_df_norm['heartRate']
x = out_df_norm['pace min/km']
iplot([go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap')),
go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.3))], show_link=False)
y = speed_df['heartRate']
x = speed_df['speed']#cambia nome da speed a pace in min/km
iplot([go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap')),
go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.3))], show_link=False)
import plotly.figure_factory as ff
y = out_df_norm['heartRate']
x = out_df_norm['pace min/km']
#z = df_bar_pubg['squad_Heals']
colorscale = ['#7A4579', '#D56073', 'rgb(236,158,105)', (1, 1, 0.2), (0.98,0.98,0.98)]
fig = ff.create_2d_density(
x, y, colorscale=colorscale,
hist_color='rgb(0, 0, 255)', point_size=1
)
iplot(fig, filename='histogram_subplots')
import chart_studio.plotly as py
import plotly.graph_objs as go
import chart_studio
import pandas as pd
# setting user, api key and access token
chart_studio.tools.set_credentials_file(username='pavmassimo', api_key='YLSreh15YEIWWNq75Lxx')
mapbox_access_token = 'pk.eyJ1IjoicGF2bWFzc2ltbyIsImEiOiJja2RzeGh4cHYxc2w1MnJ0YWczZndyZG50In0.GKQETZ2daXd-VrybJ571DQ'
data = []
laps = [0, 1, 2, 3, 4, 5, 6, 7, 8]
for lap in laps:
lap_data = dict(
lat = out_df.loc[out_df['lap'] == lap,'latitude'],
lon = out_df.loc[out_df['lap'] == lap,'longitude'],
name = lap,
marker = dict(size = 8, opacity = 0.8),
type = 'scattermapbox'
)
data.append(lap_data)
layout = dict(
height = 800,
# top, bottom, left and right margins
margin = dict(t = 0, b = 0, l = 0, r = 0),
font = dict(color = '#FFFFFF', size = 11),
paper_bgcolor = '#000000',
mapbox = dict(
# here you need the token from Mapbox
accesstoken = mapbox_access_token,
bearing = 0,
# where we want the map to be centered
center = dict(
lat = 45.437,
lon = 9.243
),
# we want the map to be "parallel" to our screen, with no angle
pitch = 0,
# default level of zoom
zoom = 15,
# default map style
style = 'dark'
)
)
annotations = [dict(
# text I want to display. I used <br> to break it into two lines
text = 'All US storm events that caused more than $50k of economic damage,<br> from 2000 until today',
# font and border characteristics
font = dict(color = '#FFFFFF', size = 14), borderpad = 10,
# positional arguments
x = 0.05, y = 0.05, xref = 'paper', yref = 'paper', align = 'left',
# don't show arrow and set background color
showarrow = False, bgcolor = 'black'
)]
# assigning the annotations to the layout
layout['annotations'] = annotations
updatemenus=list([
# drop-down 1: map styles menu
# buttons containes as many dictionaries as many alternative map styles I want to offer
dict(
buttons=list([
dict(
args=['mapbox.style', 'dark'],
label='Dark',
method='relayout'
),
dict(
args=['mapbox.style', 'light'],
label='Light',
method='relayout'
),
dict(
args=['mapbox.style', 'outdoors'],
label='Outdoors',
method='relayout'
),
dict(
args=['mapbox.style', 'satellite-streets'],
label='Satellite with Streets',
method='relayout'
)
]),
# direction where I want the menu to expand when I click on it
direction = 'up',
# here I specify where I want to place this drop-down on the map
x = 0.75,
xanchor = 'left',
y = 0.05,
yanchor = 'bottom',
# specify font size and colors
bgcolor = '#000000',
bordercolor = '#FFFFFF',
font = dict(size=11)
),
# drop-down 2: select type of storm event to visualize
dict(
# for each button I specify which dictionaries of my data list I want to visualize. Remember I have 7 different
# types of storms but I have 8 options: the first will show all of them, while from the second to the last option, only
# one type at the time will be shown on the map
buttons=list([
dict(label = 'All Laps',
method = 'update',
args = [{'visible': [True, True, True, True, True, True, True]}]),
dict(label = '0',
method = 'update',
args = [{'visible': [True, False, False, False, False, False, False]}]),
dict(label = '1',
method = 'update',
args = [{'visible': [False, True, False, False, False, False, False]}]),
dict(label = '2',
method = 'update',
args = [{'visible': [False, False, True, False, False, False, False]}]),
dict(label = '3',
method = 'update',
args = [{'visible': [False, False, False, True, False, False, False]}]),
dict(label = '4',
method = 'update',
args = [{'visible': [False, False, False, False, True, False, False]}]),
dict(label = '5',
method = 'update',
args = [{'visible': [False, False, False, False, False, True, False]}]),
dict(label = '6',
method = 'update',
args = [{'visible': [False, False, False, False, False, False, True]}])
]),
# direction where the drop-down expands when opened
direction = 'down',
# positional arguments
x = 0.01,
xanchor = 'left',
y = 0.99,
yanchor = 'bottom',
# fonts and border
bgcolor = '#000000',
bordercolor = '#FFFFFF',
font = dict(size=11)
)
])
# assign the list of dictionaries to the layout dictionary
layout['updatemenus'] = updatemenus
layout['title'] = 'Pere miscredente'
data1 = []
# /1000 because distance is registered in meters
kms = [item for item in range(0, int(max(out_df['distance'])/1000)+1)]
for km in kms:
km_data = dict(
lat = out_df.loc[(((out_df['distance'] < (km+1) *1000) & (out_df['distance'] >= (km*1000)))),'latitude'],
lon = out_df.loc[(((out_df['distance'] < (km+1) *1000) & (out_df['distance'] >= (km*1000)))),'longitude'],
customdata = out_df.loc[(((out_df['distance'] < (km+1) *1000) & (out_df['distance'] >= (km*1000)))),'distance'],
hovertemplate ="%{customdata}",
name = km,
marker = dict(size = 8, opacity = 0.8),
type = 'scattermapbox'
)
data1.append(km_data)
layout = dict(
height = 800,
# top, bottom, left and right margins
margin = dict(t = 0, b = 0, l = 0, r = 0),
font = dict(color = '#FFFFFF', size = 11),
paper_bgcolor = '#000000',
mapbox = dict(
# here you need the token from Mapbox
accesstoken = mapbox_access_token,
bearing = 0,
# where we want the map to be centered
center = dict(
lat = out_df['latitude'][0],
lon = out_df['longitude'][0]
),
# we want the map to be "parallel" to our screen, with no angle
pitch = 0,
# default level of zoom
zoom = 15,
# default map style
style = 'dark'
)
)
"""annotations = [dict(
# text I want to display. I used <br> to break it into two lines
text = 'All US storm events that caused more than $50k of economic damage,<br> from 2000 until today',
# font and border characteristics
font = dict(color = '#FFFFFF', size = 14), borderpad = 10,
# positional arguments
x = 0.05, y = 0.05, xref = 'paper', yref = 'paper', align = 'left',
# don't show arrow and set background color
showarrow = False, bgcolor = 'black'
)]
# assigning the annotations to the layout
layout['annotations'] = annotations
"""
visibility = [False for item in range(0, int(max(out_df['distance'])/1000)+1)]
visibility_all = [True for item in range(0, int(max(out_df['distance'])/1000)+1)]
# for each button I specify which dictionaries of my data list I want to visualize. Remember I have n different
# kms to visualize but I have n+1 options: the first will show all of them, while from the second to the last option, only
# one km at the time will be shown on the map
buttons=list([
dict(label = 'All kms',
method = 'update',
args = [{'visible': visibility_all}])
])
for item in range(0, int(max(out_df['distance'])/1000)+1):
visibility_partial = visibility.copy()
visibility_partial[item] = True;
buttons.append(
dict(label = item,
method = 'update',
args = [{'visible': visibility_partial}])
)
# drop-down 2: select km to visualize
viz =dict(
buttons = buttons,
# direction where the drop-down expands when opened
direction = 'down',
# positional arguments
x = 0.01,
xanchor = 'left',
y = 0.99,
yanchor = 'bottom',
# fonts and border
bgcolor = '#000000',
bordercolor = '#FFFFFF',
font = dict(size=11)
)
updatemenus=list([
# drop-down 1: map styles menu
# buttons containes as many dictionaries as many alternative map styles I want to offer
dict(
buttons=list([
dict(
args=['mapbox.style', 'dark'],
label='Dark',
method='relayout'
),
dict(
args=['mapbox.style', 'light'],
label='Light',
method='relayout'
),
dict(
args=['mapbox.style', 'outdoors'],
label='Outdoors',
method='relayout'
),
dict(
args=['mapbox.style', 'satellite-streets'],
label='Satellite with Streets',
method='relayout'
)
]),
# direction where I want the menu to expand when I click on it
direction = 'up',
# here I specify where I want to place this drop-down on the map
x = 0.75,
xanchor = 'left',
y = 0.05,
yanchor = 'bottom',
# specify font size and colors
bgcolor = '#000000',
bordercolor = '#FFFFFF',
font = dict(size=11)
),
viz
])
# assign the list of dictionaries to the layout dictionary
layout['updatemenus'] = updatemenus
avg_hr = []
sum_hr = 0
for x in range(len(speed_df)):
sum_hr = sum_hr + speed_df.loc[x, 'heartRate']
if ((x+1) % 10 == 0) :
avg_hr.append(sum_hr/10)
sum_hr = 0
avg_hr2 = []
for x in range(len(avg_hr)):
avg_hr2.append(avg_hr[x]* (-1))
y = list(range(1, 10, 1))
layout = go.Layout(yaxis=go.layout.YAxis(title=''),
xaxis=go.layout.XAxis(
range=[-220, 220],
tickvals=[-200, -100, -50, 0, 50, 100, 200],
ticktext=[200, 100, 50, 0, 50, 100, 200],
title='Number'),
barmode='overlay',
bargap=0.1)
data = [go.Bar(y=y,
x=avg_hr,
orientation='h',
name='Men',
text= np.array(avg_hr).astype('int'),
hoverinfo='text',
marker=dict(color='powderblue')
),
go.Bar(y=y,
x=avg_hr2,
orientation='h',
name='Women',
text=-1 * np.array(avg_hr2).astype('int'),
hoverinfo='text',
marker=dict(color='seagreen')
)]
py.iplot(dict(data=data, layout=layout))
avg_hr = []
i = 1
x = 0
heartsum = [0 , 0]
heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while (x < len(out_df)):
if (out_df.loc[x, 'distance'] >= i*1000):
if (heartsum[1] != 0):
avg_hr.append( heartsum[0] / heartsum[1])
else:
avg_hr.append(0)
heartsum = [0, 0]
i = i+1
heartsum[0] = heartsum[0] + out_df.loc[x, 'heartRate']
heartsum[1] = heartsum[1] + 1
x = x+1
avg_hr.append( heartsum[0] / heartsum[1])
print(avg_hr)
avg_hr2 = []
for x in range(len(avg_hr)):
avg_hr2.append(avg_hr[x]* (-1))
y = list(range(1, 20, 1))
layout = go.Layout(yaxis=go.layout.YAxis(title='KM'),
xaxis=go.layout.XAxis(
range=[-220, 220],
tickvals=[-200, -100, -50, 0, 50, 100, 200],
ticktext=[200, 100, 50, 0, 50, 100, 200],
title='BPM'),
barmode='overlay',
bargap=0.1)
data = [go.Bar(y=y,
x=avg_hr,
orientation='h',
name='RUN A',
text= np.array(avg_hr).astype('int'),
hoverinfo='text',
marker=dict(color='powderblue')
),
go.Bar(y=y,
x=avg_hr2,
orientation='h',
name='RUN B',
text=-1 * np.array(avg_hr2).astype('int'),
hoverinfo='text',
marker=dict(color='seagreen')
)]
py.iplot(dict(data=data, layout=layout))
out_df_norm = out_df_norm.sort_values("Instant_pace")
out_df_norm = out_df_norm.reset_index(drop = True)
avg_hr = []
i = 1
x = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while (x < len(out_df_norm)):
while (out_df_norm.loc[x, 'Instant_pace'] >= i):
if (heartsum[1] != 0):
avg_hr.append( heartsum[0] / heartsum[1])
else:
avg_hr.append(0)
heartsum = [0, 0]
i = i+1
heartsum[0] = heartsum[0] + out_df_norm.loc[x, 'heartRate']
heartsum[1] = heartsum[1] + 1
x = x+1
avg_hr.append( heartsum[0] / heartsum[1])
print(avg_hr)
avg_hr2 = []
for x in range(len(avg_hr)):
avg_hr2.append(avg_hr[x]* (-1))
y = list(range(1, 20, 1))
layout = go.Layout(yaxis=go.layout.YAxis(title='KM/H'),
xaxis=go.layout.XAxis(
range=[-220, 220],
tickvals=[-200, -100, -50, 0, 50, 100, 200],
ticktext=[200, 100, 50, 0, 50, 100, 200],
title='BPM'),
barmode='overlay',
bargap=0.1)
data = [go.Bar(y=y,
x=avg_hr,
orientation='h',
name='RUN A',
text= np.array(avg_hr).astype('int'),
hoverinfo='text',
marker=dict(color='powderblue')
),
go.Bar(y=y,
x=avg_hr2,
orientation='h',
name='RUN B',
text=-1 * np.array(avg_hr2).astype('int'),
hoverinfo='text',
marker=dict(color='seagreen')
)]
py.iplot(dict(data=data, layout=layout))
out_df_norm = out_df_norm.sort_values("pace min/km")
out_df_norm = out_df_norm.reset_index(drop = True)
avg_hr = []
count = []
i = 6
x = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while ((x < len(out_df_norm)) and not (np.isnan(out_df_norm.loc[x, 'pace min/km']))):
while (out_df_norm.loc[x, 'pace min/km'] >= i/2):
if (heartsum[1] != 0):
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])
else:
avg_hr.append(0)
count.append(heartsum[1])
heartsum = [0, 0]
i = i+1
heartsum[0] = heartsum[0] + out_df_norm.loc[x, 'heartRate']
heartsum[1] = heartsum[1] + 1
x = x+1
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])
avg_hr2 = []
count2 = count
for x in range(len(avg_hr)):
avg_hr2.append(avg_hr[x]* (-1))
y = list(range(6, 20, 1))
for x in range(len(y)):
y[x]= y[x]/2
fig = go.Figure()
# Edit the layout
fig.update_layout(title='BPM per pace in min/km comparison',
plot_bgcolor = 'rgba(0, 0, 0, 0)',
paper_bgcolor = 'rgba(0, 0, 0, 0)',
)
fig.update_layout(yaxis=go.layout.YAxis(
range=[3, 10],
tickvals=[ 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5],
ticktext=[ "2.5-3.0", "3.0-3.5", "3.5-4.0", "4.0-4.5", "4.5-5.0", "5.0-5.5", "5.5-6.0", "6.5-7.0", "7.0-7.5", "7.5-8.0", "8.0-8.5", "8.5-9.0", "9.0-9.5"],
title='pace in min/km',
showgrid=False),
xaxis=go.layout.XAxis(
range=[-220, 220],
tickvals=[-200, -150, -100, -50, 0, 50, 100, 150, 200],
ticktext=[200, 150, 100, 50, 0, 50, 100, 150, 200],
title='BPM',
showgrid=True, gridcolor='LightGrey'),
barmode='overlay',
bargap=0.1,
showlegend=False)
fig.add_trace(go.Bar(y=y,
x=avg_hr,
orientation='h',
name='RUN A',
text= np.array(count).astype('int'),
hoverinfo='text',
marker=dict(color=count, cmin=3, cmax = max(count+count2), colorscale="Blues", colorbar=dict(title="."),),
))
fig.add_trace(go.Bar(y=y,
x=avg_hr2,
orientation='h',
name='RUN B',
text= np.array(count).astype('int'),
hoverinfo='text',
marker=dict(color=count, cmin=3, cmax = max(count+count2), colorscale="Reds", colorbar=dict(title="number of records", x = 1, y = 0.5, showticklabels= False),)
))
fig.show()
#py.iplot(dict(data=data, layout=layout))
#out_df_norm.tail(30)
out_df_norm = out_df_norm.sort_values("pace min/km")
out_df_norm = out_df_norm.reset_index(drop = True)
avg_hr = []
count = []
i = 6
x = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while ((x < len(out_df_norm)) and not (np.isnan(out_df_norm.loc[x, 'pace min/km']))):
while (out_df_norm.loc[x, 'pace min/km'] >= i/2):
if (heartsum[1] != 0):
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])
else:
avg_hr.append(0)
count.append(heartsum[1])
heartsum = [0, 0]
i = i+1
heartsum[0] = heartsum[0] + out_df_norm.loc[x, 'heartRate']
heartsum[1] = heartsum[1] + 1
x = x+1
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])
avg_hr2 = avg_hr
count2 = count
#for x in range(len(avg_hr)):
# avg_hr2.append(avg_hr[x]* (-1))
y = list(range(6, 20, 1))
for x in range(len(y)):
y[x]= y[x]/2
fig = go.Figure()
# Edit the layout
fig.update_layout(title='BPM per pace in min/km comparison',
plot_bgcolor = 'rgba(0, 0, 0, 0)',
paper_bgcolor = 'rgba(0, 0, 0, 0)',
)
fig.update_layout(yaxis=go.layout.YAxis(
range=[3, 10],
tickvals=[ 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5],
ticktext=[ "2.5-3.0", "3.0-3.5", "3.5-4.0", "4.0-4.5", "4.5-5.0", "5.0-5.5", "5.5-6.0", "6.5-7.0", "7.0-7.5", "7.5-8.0", "8.0-8.5", "8.5-9.0", "9.0-9.5"],
title='pace in min/km',
showgrid=False),
xaxis=go.layout.XAxis(
range=[0, 220],
tickvals=[0, 50, 100, 150, 200],
ticktext=[0, 50, 100, 150, 200],
title='BPM',
showgrid=True, gridcolor='LightGrey'),
barmode='overlay',
bargap=0.1,
showlegend=False)
fig.add_trace(go.Bar(y=y,
x=avg_hr,
orientation='h',
name='RUN A',
text= np.array(count).astype('int'),
hoverinfo='text',
marker=dict(color=count, cmin=3, cmax = max(count+count2), colorscale="Blues", colorbar=dict(title="."),),
))
fig.add_trace(go.Bar(y=y,
x=avg_hr2,
orientation='h',
name='RUN B',
text= np.array(count).astype('int'),
hoverinfo='text',
marker=dict(color=count, cmin=3, cmax = max(count+count2), colorscale="Reds", colorbar=dict(title="number of records", x = 1, y = 0.5, showticklabels= False),)
))
# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()